* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"

* Create required subfolders
local category "Employment"

cap mkdir "${root}/data/derived"
cap mkdir "${root}/data/derived/`category'"
cap mkdir "${root}/results/`category'"
cap mkdir "${root}/results/paper numbers"
cap mkdir "${root}/results/paper numbers/`category'"

********************************************************************************
**# 1. Further cleaning for Paychex data 
********************************************************************************

/*
Reweight firms on 2-digit NAICS codes such that the (worker-weighted) distribution of 2-digit NAICS codes within Paychex firms in each size bin
matches the national distribution of 2-digit NAICS codes among Paychex firms in the period January 4-31 2020. 
*/

foreach spec in "eligible" "eligible_alt" {
	
	project, uses("${root}/data/dvc/Employment/PPP_Paychex_Earnin_Combined.dta")
	use "${root}/data/dvc/Employment/PPP_Paychex_Earnin_Combined.dta", clear
	gisid countyfips naics quartile size date
	
	*--------------------------------------------------------------------------*
	* Define eligibility groups 
	*--------------------------------------------------------------------------*
	gen eligible = 1 if inrange(size, 100, 499)
	replace eligible = 0 if inrange(size, 500, 799)
	assert !mi(eligible)
	
	gen eligible_alt = 1 if inrange(size, 300, 499)
	replace eligible_alt = 0 if inrange(size, 500, 699)

	* Matters for winsorizing - if we do not do this, there will be a greater mass of zeroes at the left tail
	drop if mi(`spec')
	
	*--------------------------------------------------------------------------*
	* Reweighting by industry 
	*--------------------------------------------------------------------------*
	* Collapse from county X NAICS X wage X firm size bin X week level to county X NAICS X wage X PPP eligibility X week level 
	gcollapse (rawsum) base (mean) emp_index_combined [w = base], by(countyfips naics quartile `spec' date)
	
	* Get share of employment by NAICS in Jan 2020, overall; for 100-499 cells; for 500-799 cells 
	gegen tot_emp_naics_eligible = total(base), by(naics `spec')
	gegen tot_emp_eligible = total(base), by(`spec')
	gegen tot_emp_naics = total(base), by(naics)
	gegen tot_emp = total(base)

	gen naics_emp_share = (tot_emp_naics / tot_emp) / (tot_emp_naics_eligible / tot_emp_eligible)
	assert !mi(naics_emp_share)
	drop tot_emp_naics_eligible tot_emp_eligible tot_emp_naics tot_emp

	* Rescale weights by NAICS share 
	replace base = base * naics_emp_share
	
	* Check that industry composition is now equal for treated vs. control firms 
	preserve 
	gegen tot_emp_naics_eligible = total(base), by(naics `spec')
	gegen tot_emp_eligible = total(base), by(`spec')
	gegen tot_emp_naics = total(base), by(naics)
	gegen tot_emp = total(base)
	
	gen naics_emp_share_eligible = tot_emp_naics_eligible / tot_emp_eligible
	keep naics_emp_share_eligible naics `spec'
	gduplicates drop
	greshape wide naics_emp_share_eligible, i(naics) j(`spec')
	assert inrange(naics_emp_share_eligible0, naics_emp_share_eligible1 - 1E-7, naics_emp_share_eligible1 + 1E-7)
	restore

	* Employment index, winsorized 
	foreach var in "emp_index_combined" {
		assert !mi(`var')
		sum `var' [w = base], d 
		replace `var' = r(p99) if `var' > r(p99)
	}

	* DiD 
	gen treatment_period = inrange(date, mdy(4, 3, 2020), mdy(9, 1, 2020))
	gen did = (`spec' == 1 & treatment_period == 1)
	
	tempfile ppp_`spec'
	save `ppp_`spec'', replace
}

********************************************************************************
**# 2. Regressions 
********************************************************************************
foreach spec in "eligible" "eligible_alt" {
	
	use `ppp_`spec'', clear

	* Prepare event study coefficients
	glevelsof date, local(dates)
	foreach week of local dates {
		gen did_`week' = (`spec' == 1 & date == `week')
	}

	* Drop one date to avoid collinearity - Jan 3 
	drop did_`=mdy(1, 3, 2020)'

	foreach var in "emp_index_combined" {
		preserve 
		
		* Mean of control series in each week
		gen control_mean = .

		foreach week of local dates {
			sum `var' [w = base] if date == `week' & `spec' == 0
			replace control_mean = r(mean) if date == `week'
		}
		
		* ATT:  DiD at the county-NAICS-wage quartile-PPP eligibility-week level, with county-wage quartile-week FEs, restricting to Mar 11 - Aug 15 and weighting by base employment
		reghdfe `var' did i.`spec' [w = base] if inrange(date, mdy(3, 11, 2020), mdy(8, 15, 2020)), ///
			absorb(i.countyfips#i.quartile#i.date) vce(cluster i.countyfips#i.naics#i.`spec')
		local beta_`spec': di %03.2f _b[did]
		local se_`spec': di %03.2f _se[did]

		* Event study 
		reghdfe `var' did_* i.`spec' [w = base], ///
			absorb(i.countyfips#i.quartile#i.date) vce(cluster i.countyfips#i.naics#i.`spec')

		gen beta = control_mean
		foreach week of local dates {
			if `week' != mdy(1, 3, 2020) replace beta = _b[did_`week'] + control_mean if date == `week'
		}

		gcollapse control_mean beta, by(date)
	
		* Graph options
		if "`spec'" == "eligible" {
			local treat_lab "100-499 Employees"
			local control_lab  "500-799 Employees"
			local ylabel `"ylab(0 "0%" -10 "-10%" -20 "-20%" -30 "-30%", nogrid)"'
			local text_ypos = 3
		}
	
		else {
			local treat_lab "300-499 Employees"
			local control_lab  "500-699 Employees"
			local ylabel `"ylab(10 "+10%" 0 "0%" -10 "-10%" -20 "-20%" -30 "-30%", nogrid)"'
			local text_ypos = 13
		}

		tw ///
		(connect beta date if date <= mdy(9, 1, 2020), lcolor(oi1) mcolor(oi1)) ///
		(connect control_mean date if date <= mdy(9, 1, 2020), lcolor(oi2) mcolor(oi2)), ///
		xtitle("") ///
		xline(`=mdy(4, 3, 2020)', lpattern(dash) lcolor(gs8)) ///
		xline(`=mdy(8, 15, 2020)', lpattern(dash) lcolor(gs8)) ///
		ytitle("Change in Employment (%)" "Relative to January 2020") ///
		xlab(`=mdy(1, 1, 2020)' `""Jan" "2020""' `=mdy(2, 1, 2020)' "Feb" `=mdy(3, 1, 2020)' "Mar" ///
		`=mdy(4, 1, 2020)' "Apr" `=mdy(5, 1, 2020)' "May" `=mdy(6, 1, 2020)' "Jun" `=mdy(7, 1, 2020)' "Jul" ///
		`=mdy(8, 1, 2020)' "Aug" `=mdy(9, 1, 2020)' "Sep") ///
		`ylabel' ///
		text(`text_ypos' `=mdy(4, 3, 2020)' "April 3", color(black) size(small)) ///
		text(`=`text_ypos' - 10' `=`=mdy(4, 3, 2020)' + (`=mdy(8, 15, 2020)' - `=mdy(4, 3, 2020)') / 2' "Estimated Effect on Firms with 100-499 Employees:" "`beta_`spec'' p.p. (s.e. = `se_`spec'' p.p.)", color(black) size(small)) ///
		text(`text_ypos' `=mdy(8, 15, 2020)' "August 15", color(black) size(small)) ///
		graphr(margin(r+5)) ///
		legend(label(1 "`treat_lab'") label(2 "`control_lab'") row(1) size(small))
		
		* Export just the combined Paychex-Earnin version with the 100-800 employee bandwidth 
		if "`spec'" == "eligible" & "`var'" == "emp_index_combined" oi_graph_export "${root}/results/Employment/Change in Employment by PPP Eligibility", type(${fig_type})
		
		restore 
	}
}

********************************************************************************
**# 3. Scalars for paper
********************************************************************************

* Mean decline in employment in control group
use `ppp_eligible', clear
sum emp_index_combined [w = base] if date == mdy(8, 14, 2020) & eligible == 0
local control_decline_aug15: di %3.1f abs(r(mean))

* Jobs saved by PPP in percentage terms (not adjusting for attenuation bias)
local jobs_saved_pct: di %2.1f `beta_eligible' / `control_decline_aug15' * 100

* LATE (ATT * 1.35 - see Appendix L.2 for how we get 1.35)
local beta_eligible_late: di %3.1f `beta_eligible' * 1.35

* LATE expressed in shares rather than percentage points, rounded to 2 d.p.
local beta_eligible_late_shares: di %03.2f `beta_eligible_late' / 100

* Jobs saved by the PPP in millions (adjusting for attenuation bias - see Appendix L.2 for how we get 53.6M)
local jobs_saved: di %3.2f `beta_eligible_late_shares' * 53.6

* Cost per job saved (see Appendix L.2 for how we get 486 billion)
local cost_per_job_saved: di %7.0fc (486 * 1E9) / (`jobs_saved' * 1E6)

* Cost per job saved; upper 95% CI
local jobs_saved_upperCI = (`beta_eligible' + `se_eligible' * 1.96) * 1.35 / 100 * 53.6
local cost_per_job_saved_upperCI: di %6.0fc (486 * 1E9) / (`jobs_saved_upperCI' * 1E6)

* Cost per job saved, net of UI payments (18,350 - see Appendix L.2)
local cost_per_job_saved_w_UI: di %7.0fc (486 * 1E9) / (`jobs_saved' * 1E6) - 18350

* Cost per job saved; upper 95% CI, net of UI payments 
local cost_per_job_saved_upperCI_w_UI: di %6.0fc (486 * 1E9) / (`jobs_saved_upperCI' * 1E6) - 18350

* Jobs saved by the PPP in millions, adjusting for attenuation bias and including NAICS 72 (see Appendix L.2 for how we get 86.1%)
local jobs_saved_w_naics72: di %3.2f `beta_eligible_late_shares' * 53.6 * (1 / 0.861)

* Export scalars for paper 
cap erase "${root}/results/paper numbers/`category'/PPP estimates.yaml"

yamlout using "${root}/results/paper numbers/`category'/PPP estimates.yaml", ///
	key("ppp_eligible_beta") ///
	comment("ATT (beta) using 100-800 employee bandwidth") ///
	value("`beta_eligible'") 
	
yamlout using "${root}/results/paper numbers/`category'/PPP estimates.yaml", ///
	key("ppp_eligible_se") ///
	comment("ATT (s.e.) using 100-800 employee bandwidth") ///
	value("`se_eligible'")

yamlout using "${root}/results/paper numbers/`category'/PPP estimates.yaml", ///
	key("ppp_control_decline_aug15") ///
	comment("Mean decline in employment in 500-800 employee firms by Aug 15") ///
	value("`control_decline_aug15'") 

yamlout using "${root}/results/paper numbers/`category'/PPP estimates.yaml", ///
	key("ppp_jobs_saved_pct") ///
	comment("Jobs saved (%) by Aug 15") ///
	value("`jobs_saved_pct'") 

yamlout using "${root}/results/paper numbers/`category'/PPP estimates.yaml", ///
	key("ppp_eligible_late") ///
	comment("LATE using 100-800 employee bandwidth") ///
	value("`beta_eligible_late'") 

yamlout using "${root}/results/paper numbers/`category'/PPP estimates.yaml", ///
	key("ppp_eligible_late_shares") ///
	comment("LATE using 100-800 employee bandwidth (shares instead of percentage points)") ///
	value("`beta_eligible_late_shares'") fmt(%03.2f)

yamlout using "${root}/results/paper numbers/`category'/PPP estimates.yaml", ///
	key("ppp_jobs_saved") ///
	comment("Jobs saved (millions, adjusting for attenuation bias) by Aug 15") ///
	value("`jobs_saved'") 

yamlout using "${root}/results/paper numbers/`category'/PPP estimates.yaml", ///
	key("ppp_cost_per_job_saved") ///
	comment("Cost per job saved (using LATE estimate, adjusting for attenuation bias) by Aug 15") ///
	value("`cost_per_job_saved'") 

yamlout using "${root}/results/paper numbers/`category'/PPP estimates.yaml", ///
	key("ppp_cost_per_job_saved_upperCI") ///
	comment("Cost per job saved (using upper 95% CI of LATE estimate, adjusting for attenuation bias) by Aug 15") ///
	value("`cost_per_job_saved_upperCI'")
	
yamlout using "${root}/results/paper numbers/`category'/PPP estimates.yaml", ///
	key("ppp_cost_per_job_saved_w_UI") ///
	comment("Cost per job saved (using LATE estimate, adjusting for attenuation bias and UI benefits) by Aug 15") ///
	value("`cost_per_job_saved_w_UI'") 

yamlout using "${root}/results/paper numbers/`category'/PPP estimates.yaml", ///
	key("ppp_cost_per_job_saved_upperCI_w_UI") ///
	comment("Cost per job saved (using upper 95% CI of LATE estimate, adjusting for attenuation bias and UI benefits) by Aug 15") ///
	value("`cost_per_job_saved_upperCI_w_UI'")
	
yamlout using "${root}/results/paper numbers/`category'/PPP estimates.yaml", ///
	key("ppp_jobs_saved_all") ///
	comment("Jobs saved (millions, adjusting for attenuation bias and including NAICS 72) by Aug 15") ///
	value("`jobs_saved_w_naics72'") 
	
project, creates("${root}/results/paper numbers/`category'/PPP estimates.yaml")


********************************************************************************
**# 4. Regression table 
********************************************************************************
clear 
set obs 2

gen beta = ""
gen se = ""

replace beta = "`beta_eligible'" in 1
replace se = "(" + "`se_eligible'" + ")" in 1

replace beta = "`beta_eligible_alt'" in 2
replace se = "(" + "`se_eligible_alt'" + ")" in 2

* Export
export excel "${root}/results/new_app_table_12.xlsx", sheet(new_app_table_12, replace)
project, creates("${root}/results/new_app_table_12.xlsx")
